{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dataset Package"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# statsmodels provides datasets using in example, models test, tutorial\n",
    "import statsmodels.api as sm\n",
    "\n",
    "# 所有的dataname(item)和package\n",
    "# http://vincentarelbundock.github.com/Rdatasets/datasets.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 存储数据库的目录 ~/statsmodels_data\n",
    "sm.datasets.get_data_home()\n",
    "\n",
    "# cache = True 把数据保存到data_home中, 缓存之后不会重复下载\n",
    "duncan_prestige = sm.datasets.get_rdataset(\"Duncan\", \"carData\", cache=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(type(duncan_prestige), type(duncan_prestige.data)) # statsmodels.datasets.utils.Dataset\n",
    "print(duncan_prestige.__doc__) # 描述这个数据包含的内容"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# statsmodels模块内置的sample数据statsmodels/datasets\n",
    "data_1 = sm.datasets.longley.load()\n",
    "data_2 = sm.datasets.longley.load_pandas()\n",
    "\n",
    "# ?\n",
    "print(type(data_1), \"\\n\", type(data_2))\n",
    "\n",
    "# data_1.data: numpy.recarray 记录数组, 可以使用\".\"的方式访问属性\n",
    "print(type(data_1.data), \"\\n\", type(data_2.data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_1.data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# exog(exogen): 外因, endog(endogen): 内因\n",
    "print(data_1.endog_name, \"\\n\", data_1.exog_name, \"\\n\", data_1.names) # 输出列的名\n",
    "print(data_1.endog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this is ok\n",
    "print(type(data_1.exog), type(data_2.exog))\n",
    "data_1.exog[:5, :]\n",
    "# this is error\n",
    "# data_2.exog[:5, :]\n",
    "# data_2.exog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ok\n",
    "print(type(data_1.raw_data))\n",
    "# error ?\n",
    "# print(type(data_2.raw_data))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
